3,143
---
title: "US Broadband"
author: "Antony Rono"
output:
flexdashboard::flex_dashboard:
orientation: rows
vertical_layout: fill
source_code: embed
editor_options:
chunk_output_type: console
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)
library(flexdashboard)
library(tidyverse)
library(tidytuesdayR)
library(scales)
library(extrafont)
library(zipcodeR)
library(janitor)
library(magrittr)
library(plotly)
library(tigris)
library(ggthemes)
library(highcharter)
options(tigris_use_cache = TRUE)
options(scipen = 99)
theme_set(theme_light())
```
```{r Load, include=FALSE}
tt <- tt_load("2021-05-11")
broadband <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-05-11/broadband.csv')
broadband_zip <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-05-11/broadband_zip.csv')
```
```{r Cleaning Data, include=FALSE}
broadband_county <- broadband %>%
clean_names() %>%
mutate(state = state.name[match(st, state.abb)],
state = ifelse(st == "DC", "District of Columbia", state),
broadband_availability_per_fcc = parse_number(broadband_availability_per_fcc, na = "-"),
broadband_usage = parse_number(broadband_usage, na = "-"),
county_id = sprintf("%05d", county_id)
) %>%
mutate_at(c("county_name", "state"), str_trim)
skimr::skim(broadband)
```
```{r Adding County Population to broadband df, include=FALSE}
library(readxl)
population <- read_xlsx("US Population By County.xlsx", skip = 3)
population %<>%
rename(county_name = ...1) %>%
clean_names() %>%
filter(!is.na(census)) %>%
mutate(county_name = str_remove(county_name, "^[.]")) %>%
separate(county_name, c("county_name", "state"), sep = ",") %>%
mutate_at(c("county_name", "state"), str_trim) %>%
select(county_name, state, population_2017 = x2017, population_2019 = x2019) %>%
mutate(county_name = case_when(
county_name == "Petersburg Borough" ~ "Petersburg Census Area",
county_name == "Oglala Lakota County" ~ "Oglala County",
TRUE ~ county_name
))
broadband_with_population <- broadband_county %>%
inner_join(population, by = c("county_name", "state")) %>%
mutate(county_state = paste0(str_remove(county_name, " County"),", ", state))
```
```{r Cleaning and Joining with zip code db, include=FALSE}
broadband_zip %<>%
clean_names()
skimr::skim(broadband_zip)
broadband_zip_joined <- broadband_zip %>%
mutate(postal_code = sprintf("%05d", postal_code)) %>%
left_join(zip_code_db, by = c("postal_code" = "zipcode")) %>%
mutate(state = state.name[match(st, state.abb)],
state = ifelse(st == "DC", "District of Columbia", state)
) %>%
mutate(city_state = paste0(major_city,", ", state)) %>%
select(-county)
```
Overall {data-icon="fa-map-marker-alt"}
=======================================================================
Row {data-height=250}
-----------------------------------------------------------------------
### Total Number of Counties
```{r count of counties}
valueBox(value = comma(nrow(broadband_county %>% distinct(county_id))), caption = "Number of Counties", icon = "fa-map-pin", color = "green")
```
### Number of Counties with more than 50% Broadband Acess
```{r valuebox of broadband access}
gauge(broadband_county %>% filter(broadband_availability_per_fcc > 0.5) %>% nrow,
min = 0,
max = nrow(broadband_county),
gaugeSectors(success = c(0.75*nrow(broadband_county), nrow(broadband_county)),
warning = c(0.5*nrow(broadband_county),(0.75*nrow(broadband_county)-1)),
danger = c(0, 0.5*nrow(broadband_county)))
)
```
### Number of Counties with more than 25% Broadband Usage
```{r valuebox of broadband usage}
gauge(broadband_county %>% filter(broadband_usage > 0.25) %>% nrow,
min = 0,
max = nrow(broadband_county),
gaugeSectors(success = c(0.75*nrow(broadband_county), nrow(broadband_county)),
warning = c(0.5*nrow(broadband_county),(0.75*nrow(broadband_county)-1)),
danger = c(0, 0.5*nrow(broadband_county)))
)
```
Row{data-height=750}
-----------------------------------------------------------------------
### Broadband Access in the Different Counties
```{r Broadband Access in Different Counties}
hcmap(
map = "countries/us/us-all-all-highres", # high resolution world map
data = broadband_with_population %>% rename(fips = county_id) , # name of dataset
joinBy = "fips",
value = "broadband_availability_per_fcc",
showInLegend = FALSE, # hide legend
nullColor = "#FFFFFF",
download_map_data = TRUE,
dataLabels = list(enabled = TRUE, format = '{point.name}')
) %>%
#hc_colorAxis(minColor = "#FFFFCC", maxColor = "blue") %>%
hc_mapNavigation(enabled = FALSE) %>%
hc_legend("none")
```
### Broadband Usage in the Different Counties
```{r Broadband Usage in Different Counties}
hcmap(
map = "countries/us/us-all-all-highres", # high resolution world map
data = broadband_with_population %>% rename(fips = county_id) , # name of dataset
joinBy = "fips",
value = "broadband_usage",
showInLegend = FALSE, # hide legend
nullColor = "#FFFFFF",
download_map_data = TRUE,
dataLabels = list(enabled = TRUE, format = '{point.name}')
) %>%
#hc_colorAxis(minColor = "#FFFFCC", maxColor = "blue") %>%
hc_mapNavigation(enabled = FALSE) %>%
hc_legend("none")
```
Relationships {data-orientation=columns data-icon="fa-chart-line"}
=======================================================================
Column
-----------------------------------------------------------------------
### Relationship Between Broadband Usage and Broadband Availability
```{r Broadband Usage by Broadband Availability}
p <- broadband_with_population %>%
ggplot(aes(broadband_availability_per_fcc,broadband_usage, color = county_state )) +
geom_point() +
geom_smooth(color = "blue", method = "loess")+
theme(legend.position = "none") +
labs(y = "Broadband Usage in 2019",
x = "Broadband Availability in 2017")
p
```
Column
-----------------------------------------------------------------------
### Relationship Between Broadband Usage and Population Size
```{r Broadband Usage by Population Size}
p <- broadband_with_population %>%
ggplot(aes( broadband_usage,population_2019, color = county_state )) +
geom_point() +
geom_smooth(color = "blue", method = "lm")+
theme(legend.position = "none") +
scale_x_continuous(labels = percent) +
scale_y_log10(labels = comma) +
coord_flip() +
expand_limits(y = 10000)+
labs(x = "Broadband Usage in 2019",
y = "Population in 2019 (Log Scale)")
p
```
### Relationship Between Broadband Usage and Median Income
```{r Broadband Usage by HH Income}
p <- broadband_zip_joined %>%
filter(!is.na(median_household_income)) %>%
ggplot(aes(median_household_income, broadband_usage , color = city_state )) +
geom_point() +
geom_smooth(color = "blue", method = "lm")+
scale_x_log10(labels = comma) +
scale_y_continuous(labels = percent, limits = c(0,1)) +
theme(legend.position = "none") +
labs(y = "Broadband Usage in 2020",
x = "Median Household Income")
p
```
Column
-----------------------------------------------------------------------
### Relationship Between Broadband Usage and Population Density
```{r Broadband Usage by Population density}
p <- broadband_zip_joined %>%
filter(population > quantile(population, 0.2, na.rm = TRUE)) %>%
ggplot(aes(population_density, broadband_usage , color = city_state )) +
geom_point() +
geom_smooth(color = "blue", size =2, method = "lm")+
scale_x_log10(labels = comma) +
scale_y_continuous(labels = percent) +
theme(legend.position = "none") +
labs(y = "Broadband Usage in 2020",
x = "Population Density (Log Scale)")
p
```
### Relationship Between Broadband Usage and Occupied Housing Units
```{r, Broadband Usage by Occupied Housing Units}
p <- broadband_zip_joined %>%
filter(!is.na(occupied_housing_units), occupied_housing_units > 0) %>%
ggplot(aes(occupied_housing_units, broadband_usage , color = city_state )) +
geom_point() +
geom_smooth(color = "blue", method = "lm")+
scale_x_log10(labels = comma) +
scale_y_continuous(labels = percent, limits = c(0,1)) +
theme(legend.position = "none") +
labs(y = "Broadband Usage in 2020",
x = "Occupied Housing Units")
p
```